library(ggh4x)
library(tidyverse)
library(stringr)
library(modelsummary)
options(modelsummary_factory_latex = 'kableExtra')
library(kableExtra)
library(cowplot)
library(grid)
library(gridExtra)
library(scales)

# Set the directory to the location of the replication data
setwd("")

source("ggplot_theme.R")

# Was saved as rds rather than csv to keep factor level orderings
V <- read_rds("Data/Vignette_Data.rds")

# Sample sizes (Appendix C in text and tables)
n_us_citizen <- length(unique(V$id[V$country == "United States" & V$respondent_type == "Citizen"]))
n_be_citizen <- length(unique(V$id[V$country == "Belgium" & V$respondent_type == "Citizen"]))
n_cl_citizen <- length(unique(V$id[V$country == "Chile" & V$respondent_type == "Citizen"]))
n_dk_citizen <- length(unique(V$id[V$country == "Denmark" & V$respondent_type == "Citizen"]))

n_us_politician <- length(unique(V$id[V$country == "United States" & V$respondent_type == "Politician"]))
n_be_politician <- length(unique(V$id[V$country == "Belgium" & V$respondent_type == "Politician"]))
n_cl_politician <- length(unique(V$id[V$country == "Chile" & V$respondent_type == "Politician"]))
n_dk_politician <- length(unique(V$id[V$country == "Denmark" & V$respondent_type == "Politician"]))


# Sentence in "Experimental Design" section regarding sample sizes
cat("\npoliticians (N$~=~$", comma(length(unique(V$id[V$respondent_type == "Politician"]))), ") ",
    "and citizens (N$~=~$", comma(length(unique(V$id[V$respondent_type == "Citizen"]))), ") ",
    "in the United States, Denmark, Belgium, and Chile.\n\n", sep = "")

# Sentence in Appendix C.1 regarding citizen sample sizs
cat("\nThe resulting sample sizes for each citizen survey were ",
    comma(n_us_citizen), " (United States), ", comma(n_be_citizen), " (Belgium), ",
    comma(n_cl_citizen), " (Chile), and ",
    comma(n_dk_citizen), " (Denmark).\n\n", sep = "")


# TABLES IN APPENDIX C
# Output Latex tables to compare the population and sample

# Define US regions
northeast <- c("Connecticut", "Maine", "Massachusetts", "New Hampshire",
               "Rhode Island", "Vermont", "New Jersey", "New York",
               "Pennsylvania")
midwest <- c("Illinois", "Indiana", "Michigan", "Ohio", "Wisconsin", "Iowa",
             "Kansas", "Minnesota", "Missouri", "Nebraska", "North Dakota",
             "South Dakota")
south <- c("Delaware", "Florida", "Georgia", "Maryland", "North Carolina",
           "South Carolina", "Virginia", "District of Columbia", "West Virginia",
           "Alabama", "Kentucky", "Mississippi", "Tennessee", "Arkansas",
           "Louisiana", "Oklahoma", "Texas")
west <- c("Arizona", "Colorado", "Idaho", "Montana", "Nevada", "New Mexico",
          "Utah", "Wyoming", "Alaska", "California", "Hawaii", "Oregon",
          "Washington")

# ORDER FACTOR LEVELS
V <- V %>%
     mutate(country = factor(country),
            resp_gender = factor(resp_gender, levels = c("Male", "Female")),
            resp_age_group = factor(resp_age_group, levels = c("18-24", "18-26",
                                                               "25-34", "27-36",
                                                               "35-44", "37-46",
                                                               "45-54", "47-56",
                                                               "55-64", "57-66",
                                                               "65+", "67+")),
            resp_education = factor(resp_education, levels = c("Below high school", "Secondary school",
                                                               "Higher non-university", "University degree")),
            dynata_edu = factor(dynata_edu, levels = c("Low", "Middle", "High")),
            resp_ideology_discrete = factor(resp_ideology_discrete, levels = c("Right-wing", "Center", "Left-wing")),
            resp_exposure_binary = factor(resp_exposure_binary, levels = c("Has not experienced social media harassment", "Experienced social media harassment")),
            resp_party = factor(resp_party, levels = c("Alternativet (ALT)",
                                                       "CD&V",
                                                       "Comunes",
                                                       "Convergencia Social",
                                                       "Dansk Folkeparti (DF)",
                                                       "Democratic Party",
                                                       "Det Konservative Folkeparti (KF)",
                                                       "Enhedslisten (EL)",
                                                       "Evolución Política",
                                                       "Federación Regionalista Verde Social",
                                                       "Frie Grønne (FG)",
                                                       "Groen",
                                                       "Kristendemokraterne (KD)",
                                                       "Liberal Alliance (LA)",
                                                       "N-VA",
                                                       "Nye Borgerlige (NB)",
                                                       "Open Vld",
                                                       "Partido Comunista de Chile",
                                                       "Partido de la Gente",
                                                       "Partido Demócrata Cristiano",
                                                       "Partido Liberal de Chile",
                                                       "Partido por la Democracia",
                                                       "Partido Radical de Chile",
                                                       "Partido Republicano de Chile",
                                                       "Partido Socialista de Chile",
                                                       "PvdA",
                                                       "Radikale Venstre (RV)",
                                                       "Renovación Nacional",
                                                       "Republican Party",
                                                       "Revolución Democrática",
                                                       "Socialdemokratiet (S)",
                                                       "Socialistisk Folkeparti (SF)",
                                                       "Unión Demócrata Independiente",
                                                       "Venstre (V)",
                                                       "Vlaams Belang",
                                                       "Vooruit",
                                                       "Other",
                                                       "Does not identify")),
            pop_party = factor(pop_party, levels = c("Social-Democratic",
                                                     "Liberal",
                                                     "Christian-Democratic/Conservative",
                                                     "Radical Left",
                                                     "Radical Right",
                                                     "Green",
                                                     "Nueva Mayoria",
                                                     "IND",
                                                     "Chile Vamos",
                                                     "Frente Amplio",
                                                     "Democratic Party",
                                                     "Republican Party",
                                                     "Special Interest",
                                                     "CD&V",
                                                     "N-VA",
                                                     "Open Vld",
                                                     "Vooruit",
                                                     "Groen",
                                                     "Vlaams Belang",
                                                     "PvdA",
                                                     "Other",
                                                     "Independent from national parties",
                                                     "Does not identify",
                                                     "Not linked to any parties")))



# CITIZEN DESCRIPTIVES

# Load in the population values for socio-demographics for comparison to the
# sample
Citizen_Population_Values <- read_rds("Data/Citizen_Population_Values.rds")

# From the citizen sample data, calculate the proportions of the sample from
# each socio-demographic group for comparison to the population values
# i.e. to examine representativeness
Gender <- V %>%
          filter(respondent_type == "Citizen") %>%
          filter(!duplicated(id) & !is.na(resp_gender) & resp_gender != "Other") %>%
          group_by(country, resp_gender) %>%
          summarize(count_collected = n()) %>%
          ungroup() %>%
          group_by(country) %>%
          mutate(percentage = count_collected / sum(count_collected) * 100) %>%
          mutate(group = recode(resp_gender, "Male" = "Gender: Men", "Female" = "Gender: Women")) %>%
          select(group, country, percentage)

Age_Group <- V %>%
             filter(respondent_type == "Citizen") %>%
             filter(!duplicated(id) & !is.na(resp_age_group)) %>%
             group_by(country, resp_age_group) %>%
             summarize(count_collected = n()) %>%
             ungroup() %>%
             group_by(country) %>%
             mutate(percentage = count_collected / sum(count_collected) * 100) %>%
             mutate(group = recode(resp_age_group,
                                   "18-24" = "Age: 18-24", "25-34" = "Age: 25-34",
                                   "35-44" = "Age: 35-44", "45-54" = "Age: 45-54",
                                   "55-64" = "Age: 55-64", "65+" = "Age: 65+")) %>%
             select(group, country, percentage)

Education <- V %>%
             filter(respondent_type == "Citizen") %>%
             filter(!duplicated(id) & !is.na(dynata_edu)) %>%
             group_by(country, dynata_edu) %>%
             summarize(count_collected = n()) %>%
             ungroup() %>%
             group_by(country) %>%
             mutate(percentage = count_collected / sum(count_collected) * 100) %>%
             mutate(group = recode(dynata_edu,
                                   "Low" = "Education: Low",
                                   "Middle" = "Education: Middle",
                                   "High" = "Education: High")) %>%
             select(group, country, percentage)

Region <- V %>%
          filter(respondent_type == "Citizen") %>%
          filter(!is.na(resp_region) & resp_region != "Andere") %>%
          group_by(country, resp_region) %>%
          summarize(count_collected = n()) %>%
          ungroup() %>%
          group_by(country) %>%
          mutate(percentage = count_collected / sum(count_collected) * 100) %>%
          mutate(group = str_c("Region: ", resp_region)) %>%
          select(group, country, percentage)


# Aggregate states to regions in the United States for the sample data
# First remove "Region: " the precedes the state name, e.g. "Region: Alaska"
Region$group[Region$country == "United States"] <- str_replace_all(Region$group[Region$country == "United States"], "^Region: ", "")

Region$region <- NA
Region$region[Region$group %in% northeast] <- "Region: Northeast"
Region$region[Region$group %in% midwest] <- "Region: Midwest"
Region$region[Region$group %in% south] <- "Region: South"
Region$region[Region$group %in% west] <- "Region: West"
Region$group[Region$country == "United States"] <- Region$region[Region$country == "United States"]

Region <- Region %>%
          ungroup() %>%
          group_by(country, group) %>%
          summarize(percentage = sum(percentage))

Party_ID <- V %>%
            filter(respondent_type == "Citizen") %>%
            filter(!is.na(resp_party)) %>%
            group_by(country, resp_party) %>%
            summarize(count_collected = n()) %>%
            ungroup() %>%
            group_by(country) %>%
            mutate(percentage = count_collected / sum(count_collected) * 100) %>%
            mutate(group = str_c("Party ID: ", resp_party)) %>%
            select(group, country, percentage)

Sample_Values <- bind_rows(Gender, Age_Group, Education, Region, Party_ID)


# Combine the population data and the citizen sample data for comparison
B <- merge(Sample_Values, Citizen_Population_Values, sort = FALSE,
           by = c("country", "group"), all.x = TRUE) %>%
     select(country, group, percentage, pop_percentage)

B <- B %>%
     mutate(group = factor(group, levels = c(unique(Sample_Values$group[str_detect(Sample_Values$group, "^Gender")]),
                                             paste0("Age: ", levels(V$resp_age_group)),
                                             paste0("Education: ", levels(V$dynata_edu)),
                                             unique(Sample_Values$group[str_detect(Sample_Values$group, "^Region")]),
                                             paste0("Party ID: ", levels(V$resp_party))))) %>%
     arrange(group)


# TABLES IN APPENDIX C
# Output Latex tables to compare the population and sample
B$percentage <- formatC(B$percentage, format = "f", flag = "0", digits = 1)
B$pop_percentage <- formatC(B$pop_percentage, format = "f", flag = "0", digits = 1)
B$pop_percentage[B$pop_percentage == "NA"] <- "--"

names(B)[names(B) == "group"] <- " "
names(B)[names(B) == "percentage"] <- "\\bf Sample \\%"
names(B)[names(B) == "pop_percentage"] <- "\\bf Population \\%"

# TABLE C6
datasummary_df(B[B$country == "United States",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:citizen_sample_us}\\bf Comparison between the citizen sample and the population in the United States",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_us_citizen)), general_title = "")

# TABLE C7
datasummary_df(B[B$country == "Belgium",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:citizen_sample_be}\\bf Comparison between the citizen sample and the population in Belgium",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_be_citizen)), general_title = "")

# TABLE C8
datasummary_df(B[B$country == "Chile",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:citizen_sample_cl}\\bf Comparison between the citizen sample and the population in Chile",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_cl_citizen)), general_title = "")

# TABLE C9
datasummary_df(B[B$country == "Denmark",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:citizen_sample_dk}\\bf Comparison between the citizen sample and the population in Denmark",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_dk_citizen)), general_title = "")




# POLITICIAN DESCRIPTIVES

# US politician response rate (Appendix C.2)
n_us_pol_emails <- 11126
n_us_politician / n_us_pol_emails * 100

# DK politician response rate (Appendix C.2)
n_dk_pol_emails <- 2821
n_dk_politician / n_dk_pol_emails * 100

# BE politician response rate (Appendix C.2)
n_be_pol_emails <- 6659
n_be_politician / n_be_pol_emails * 100

# CL politician response rate (Appendix C.2)
n_cl_pol_emails <- 2700
n_cl_politician / n_cl_pol_emails * 100

# Load in the population values for socio-demographics for comparison to the
# sample
Politician_Population_Values <- read_rds("Data/Politician_Population_Values.rds")

Gender <- V %>%
          filter(respondent_type == "Politician") %>%
          filter(!duplicated(id) & !is.na(resp_gender) & resp_gender != "Other") %>%
          group_by(country, resp_gender) %>%
          summarize(count_collected = n()) %>%
          ungroup() %>%
          group_by(country) %>%
          mutate(percentage = count_collected / sum(count_collected) * 100) %>%
          mutate(group = recode(resp_gender, "Male" = "Gender: Men", "Female" = "Gender: Women")) %>%
          select(group, country, percentage)
Gender$pop_percentage <- Politician_Population_Values$percentage[match(paste0(Gender$country, Gender$group),
                                                                       paste0(Politician_Population_Values$country,
                                                                              Politician_Population_Values$group))]

Party_ID <- V %>%
            filter(respondent_type == "Politician") %>%
            filter(!duplicated(id) & !is.na(pop_party)) %>%
            group_by(country, pop_party) %>%
            summarize(count_collected = n()) %>%
            ungroup() %>%
            group_by(country) %>%
            mutate(percentage = count_collected / sum(count_collected) * 100) %>%
            mutate(group = str_c("Party ID: ", pop_party)) %>%
            select(group, country, percentage) %>%
            ungroup()
Party_ID$pop_percentage <- Politician_Population_Values$percentage[match(paste0(Party_ID$country, Party_ID$group),
                                                                         paste0(Politician_Population_Values$country, Politician_Population_Values$group))]


B <- bind_rows(Gender, Party_ID)

B <- B %>%
     mutate(group = factor(group, levels = c(unique(B$group[str_detect(B$group, "^Gender")]),
                                             paste0("Party ID: ", levels(V$pop_party))))) %>%
     arrange(group)


# Output tables
B$percentage <- formatC(B$percentage, format = "f", flag = "0", digits = 1)
B$pop_percentage <- formatC(B$pop_percentage, format = "f", flag = "0", digits = 1)
B$pop_percentage[B$pop_percentage == "NA"] <- "--"

names(B)[names(B) == "group"] <- " "
names(B)[names(B) == "percentage"] <- "\\bf Sample \\%"
names(B)[names(B) == "pop_percentage"] <- "\\bf Population \\%"


# ** NOTE 1: US politician data are manually inputted into Appendix C from a
# table from the US survey provider, CivicPulse, regarding the difference
# between the sample and the population from which they are sampling.

# ** NOTE 2: The demographic comparison tables in this replication file do _not_
#  include political affiliation to protect the anonymity of politician
# respondents.

# TABLE C11
datasummary_df(B[B$country == "Denmark",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:politician_sample_dk}\\bf Comparison between the politician sample and the politician population in Denmark",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_dk_politician)), general_title = "")

# TABLE C12
datasummary_df(B[B$country == "Belgium",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:politician_sample_be}\\bf Comparison between the politician sample and the politician population in Belgium",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_be_politician)), general_title = "")

# TABLE C13
datasummary_df(B[B$country == "Chile",
                 c(" ", "\\bf Sample \\%", "\\bf Population \\%")],
               output = "latex",
               escape = FALSE,
               title = "\\label{table:politician_sample_cl}\\bf Comparison between the politician sample and the politician population in Chile",
               align = "lcc") %>%
footnote(paste0("n = ", scales::comma(n_cl_politician)), general_title = "")






# FIGURE 1
# POLICY PREFERENCE DESCRIPTIVES

# How concerned are you about disrespectful comments sent to politicians on
# social media? (Not concerned at all, Slightly concerned, Somewhat concerned,
#                Moderately concerned, Extremely concerned)
#
# To what extent do you disagree or agree that stronger government action
# should be taken to restrict disrespectful discourse on social media
# (Strongly disgree, Somewhat disagree, Slightly disagree,
#  Slightly agree, Somewhat agree, Strongly agree)

V <- V %>%
     mutate(concern_disrespect_binary = case_when(concern_disrespect %in% 1:3 ~ "Low\nconcern",
                                                  concern_disrespect %in% 4:5 ~ "High\nconcern"),
            govt_action_binary = case_when(govt_action %in% 1:3 ~ "Do not\nwant more",
                                           govt_action %in% 4:6 ~ "Want\nmore")) %>%
     mutate(concern_disrespect_binary = factor(concern_disrespect_binary,
                                               levels = c("Low\nconcern", "High\nconcern")),
            govt_action_binary = factor(govt_action_binary,
                                        levels = c("Do not\nwant more", "Want\nmore")))

# PANEL A OF FIGURE D1 (CONCERN WITH TOXICITY)
G1A_Overall <- V %>%
       filter(!duplicated(id) &
              !is.na(resp_gender) &
              !is.na(concern_disrespect_binary)) %>%
       group_by(respondent_type, concern_disrespect_binary) %>%
       summarize(n = n()) %>%
       ungroup() %>%
       group_by(respondent_type) %>%
       mutate(percent = n / sum(n) * 100) %>%
       mutate(respondent_type = case_when(respondent_type == "Citizen" ~ "Citizen",
                                          respondent_type == "Politician" ~ "Politician")) %>%
       mutate(category = "Overall", group = "Overall")

G1A_Gender <- V %>%
       filter(!duplicated(id) &
              !is.na(resp_gender) &
              !is.na(concern_disrespect_binary)) %>%
       group_by(resp_gender, respondent_type, concern_disrespect_binary) %>%
       summarize(n = n()) %>%
       ungroup() %>%
       group_by(resp_gender, respondent_type) %>%
       mutate(percent = n / sum(n) * 100) %>%
       mutate(respondent_type = case_when(respondent_type == "Citizen" ~ "Citizen",
                                          respondent_type == "Politician" ~ "Politician")) %>%
       mutate(resp_gender = recode(resp_gender, "Female" = "Women", "Male" = "Men")) %>%
       rename(category = resp_gender) %>%
       mutate(group = "By gender")

G1A <- bind_rows(subset(G1A_Overall, concern_disrespect_binary == "High\nconcern"),
                 subset(G1A_Gender, concern_disrespect_binary == "High\nconcern")) %>%
       mutate(group = factor(group, levels = c("Overall", "By gender")),
              respondent_type = factor(respondent_type, levels = c("Politician", "Citizen")),
              category = factor(category, levels = c("Women", "Men", "Overall")))

# Significance tests for Panel A of Figure D1
Prop_Tests_1 <- V %>%
                filter(!duplicated(id) &
                       !is.na(resp_gender) &
                       !is.na(concern_disrespect_binary)) %>%
                mutate(concern_disrespect_binary = factor(concern_disrespect_binary,
                                                          levels = c("High\nconcern", "Low\nconcern")),
                       respondent_type = factor(respondent_type, levels = c("Politician", "Citizen")),
                       resp_gender = factor(resp_gender, levels = c("Female", "Male")))

# Are politicians more concerned than citizens about uncivil comments
# sent toward politicians?
P1 <- prop.test(table(Prop_Tests_1$respondent_type,
                      Prop_Tests_1$concern_disrespect_binary))
P1
P1$p.value

# Are women politicians more concerned than men politicians about uncivil
# comments sent toward politicians?
P2 <- prop.test(table(Prop_Tests_1$resp_gender[Prop_Tests_1$respondent_type == "Politician"],
                      Prop_Tests_1$concern_disrespect_binary[Prop_Tests_1$respondent_type == "Politician"]))
P2
P2$p.value

# Are women citizens more concerned than men citizens about uncivil
# comments sent toward politicians?
P3 <- prop.test(table(Prop_Tests_1$resp_gender[Prop_Tests_1$respondent_type == "Citizen"],
                      Prop_Tests_1$concern_disrespect_binary[Prop_Tests_1$respondent_type == "Citizen"]))
P3
P3$p.value


Figure_1A <- ggplot(G1A, aes(y = percent, x = respondent_type,
                             fill = category)) +
  my.theme(base_size = 7, borderless = 2) +
  labs(x = "% with high concern\nabout comments toward politicians", y = "") +
  coord_cartesian(y = c(0, 85), expand = FALSE) +
  facet_wrap(~ group) +
  force_panelsizes(cols = c(0.5, 1)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.7) +
  geom_hline(yintercept = 0, size = 1) +
  geom_text(aes(label = paste0(round(percent), "%")),
            position = position_dodge(width = 0.9),
            size = 2.5, vjust = -1) +
  scale_y_continuous(breaks = c()) +
  scale_fill_manual(values = c("Women" = "#00BD8E", "Men" = "#5200FF", "Overall" = "grey20")) +
  theme(legend.position = "none",
        strip.text.x = element_text(angle = 0, hjust = 0, size = 7, face = "bold"))



# PANEL B OF FIGURE D1 (DESIRE FOR MORE GOVERNMENT ACTION ON TOXICITY)
G1B_Overall <- V %>%
       filter(!duplicated(id) &
              !is.na(resp_gender) &
              !is.na(govt_action_binary)) %>%
       group_by(respondent_type, govt_action_binary) %>%
       summarize(n = n()) %>%
       ungroup() %>%
       group_by(respondent_type) %>%
       mutate(percent = n / sum(n) * 100) %>%
       mutate(respondent_type = case_when(respondent_type == "Citizen" ~ "Citizen",
                                          respondent_type == "Politician" ~ "Politician")) %>%
       mutate(category = "Overall", group = "Overall")

G1B_Gender <- V %>%
       filter(!duplicated(id) &
              !is.na(resp_gender) &
              !is.na(govt_action_binary)) %>%
       group_by(resp_gender, respondent_type, govt_action_binary) %>%
       summarize(n = n()) %>%
       ungroup() %>%
       group_by(resp_gender, respondent_type) %>%
       mutate(percent = n / sum(n) * 100) %>%
       mutate(respondent_type = case_when(respondent_type == "Citizen" ~ "Citizen",
                                          respondent_type == "Politician" ~ "Politician")) %>%
       mutate(resp_gender = recode(resp_gender, "Female" = "Women", "Male" = "Men")) %>%
       rename(category = resp_gender) %>%
       mutate(group = "By gender")

G1B <- bind_rows(subset(G1B_Overall, govt_action_binary == "Want\nmore"),
                 subset(G1B_Gender, govt_action_binary == "Want\nmore")) %>%
       mutate(group = factor(group, levels = c("Overall", "By gender")),
              respondent_type = factor(respondent_type, levels = c("Politician", "Citizen")),
              category = factor(category, levels = c("Women", "Men", "Overall")))


# Significance tests for Panel B of Figure D1
Prop_Tests_2 <- V %>%
                filter(!duplicated(id) &
                       !is.na(resp_gender) &
                       !is.na(govt_action_binary)) %>%
                mutate(govt_action_binary = factor(govt_action_binary,
                                                          levels = c("Want\nmore", "Do not\nwant more")),
                       respondent_type = factor(respondent_type, levels = c("Politician", "Citizen")),
                       resp_gender = factor(resp_gender, levels = c("Female", "Male")))

# Do politicians want more government action of uncivil discourse than citizens?
P4 <- prop.test(table(Prop_Tests_2$respondent_type,
                      Prop_Tests_2$govt_action_binary))
P4
P4$p.value

# Do women politicians want more government action of uncivil discourse
# than men polticians?
P5 <- prop.test(table(Prop_Tests_2$resp_gender[Prop_Tests_2$respondent_type == "Politician"],
                      Prop_Tests_2$govt_action_binary[Prop_Tests_2$respondent_type == "Politician"]))
P5
P5$p.value

# Are women citizens more concerned than men citizens about uncivil
# comments sent toward politicians?
P6 <- prop.test(table(Prop_Tests_2$resp_gender[Prop_Tests_2$respondent_type == "Citizen"],
                      Prop_Tests_2$govt_action_binary[Prop_Tests_2$respondent_type == "Citizen"]))
P6
P6$p.value

Figure_1B <- ggplot(G1B, aes(y = percent, x = respondent_type,
                             fill = category)) +
  my.theme(base_size = 7, borderless = 2) +
  labs(x = "% who want more\ngovernment action", y = "") +
  coord_cartesian(y = c(0, 85), expand = FALSE) +
  facet_wrap(~ group) +
  force_panelsizes(cols = c(0.5, 1)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.7) +
  geom_hline(yintercept = 0, size = 1) +
  geom_text(aes(label = paste0(round(percent), "%")),
            position = position_dodge(width = 0.9),
            size = 2.5, vjust = -1) +
  scale_y_continuous(breaks = c()) +
  scale_fill_manual(values = c("Women" = "#00BD8E", "Men" = "#5200FF", "Overall" = "grey20")) +
  theme(legend.position = "none",
        strip.text.x = element_text(angle = 0, hjust = 0, size = 7, face = "bold"))

# Sample sizes for figure
cat(paste0("\n\nCitizen sample respondents (Panel A)$~=~$",
           scales::comma(nrow(filter(V, !duplicated(id) & !is.na(resp_gender) & !is.na(concern_disrespect_binary) & respondent_type == "Citizen"))),
           "; politician sample respondents (Panel A)$~=~$",
           scales::comma(nrow(filter(V, !duplicated(id) & !is.na(resp_gender) & !is.na(concern_disrespect_binary) & respondent_type == "Politician"))),
           "; citizen sample respondents (Panel B)$~=~$",
           scales::comma(nrow(filter(V, !duplicated(id) & !is.na(resp_gender) & !is.na(govt_action_binary) & respondent_type == "Citizen"))),
           "; politician sample respondents (Panel B)$~=~$",
           scales::comma(nrow(filter(V, !duplicated(id) & !is.na(resp_gender) & !is.na(govt_action_binary) & respondent_type == "Politician"))),
           ".\n\n"))

# FIGURE D1
# Differences in statistical significance are added post-hoc in Illustrator
# which are based on the p-values calculated for P1-P6 in the above code
pdf("Figures/Main_Descriptives-RAW.pdf", 5.25, 2.8)
ggdraw() +
  draw_plot(Figure_1A, x = -0.02, y = -0.01, width = 0.50, height = 0.8) +
  draw_plot(Figure_1B, x = 0.5, y = -0.01, width = 0.50, height = 0.8) +
  draw_plot_label(label = c("A.", "B."),
                x = c(0.01, 0.53), y = c(0.992, 0.992),
                size = 8, hjust = 0, vjust = 1) +
  draw_plot_label(label = c("Level of concern about messages toward\npoliticians on social media",
                            "Desire for more government action to limit\ndisrespectful discourse on social media"),
                x = c(0.04, 0.56), y = c(0.992, 0.992),
                fontface = "plain",
                size = 8, hjust = 0, vjust = 1)
dev.off()




# Appendix D
# "Finally, women politicians who indicate having personally experienced toxic
#  behavior themselves (not shown) are also more likely to express higher levels
#  of concern about toxicity toward politicians (66%) and desire for more
#  government action (67%) than their counterparts who are men and who have
#  also experienced such behavior (57%, 51% respectively)."
V %>%
filter(!duplicated(id) &
      !is.na(resp_gender) &
      !is.na(concern_disrespect_binary) &
      resp_exposure_binary == "Experienced social media harassment" &
      respondent_type == "Politician") %>%
group_by(resp_gender, concern_disrespect_binary) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(resp_gender) %>%
mutate(percent = round(n / sum(n) * 100)) %>%
mutate(resp_gender = recode(resp_gender, "Female" = "Women", "Male" = "Men")) %>%
rename(category = resp_gender) %>%
mutate(group = "By gender") %>%
filter(concern_disrespect_binary == "High\nconcern") %>%
data.frame()

V %>%
filter(!duplicated(id) &
      !is.na(resp_gender) &
      !is.na(govt_action_binary) &
      resp_exposure_binary == "Experienced social media harassment" &
      respondent_type == "Politician") %>%
group_by(resp_gender, govt_action_binary) %>%
summarize(n = n()) %>%
ungroup() %>%
group_by(resp_gender) %>%
mutate(percent = round(n / sum(n) * 100)) %>%
mutate(resp_gender = recode(resp_gender, "Female" = "Women", "Male" = "Men")) %>%
rename(category = resp_gender) %>%
mutate(group = "By gender") %>%
filter(govt_action_binary == "Want\nmore") %>%
data.frame()



# Differences between local and national politicians in reported exposure to
# toxic behavior
# Footnote 7: "In our surveys of national and local politicians,
# 60% of national politicians and 42% of local politicians report exposure
# to online toxic behavior."
# NOTE: Data on whether a politician is at the local or national level are not
# provided in the replication archive to maintain anonymity
toxic_exposure_table_politicians <- table(V$respondent_type_three[V$respondent_type_three %in% c("National politician", "Local politician") &
                                                                  V$country %in% c("Denmark", "Chile") &
                                                                  !duplicated(V$id)],
                                          V$resp_exposure_binary[V$respondent_type_three %in% c("National politician", "Local politician") &
                                                                 V$country %in% c("Denmark", "Chile") &
                                                                 !duplicated(V$id)])

prop.table(toxic_exposure_table_politicians, 1)*100
